*! version 2.23.2 10nov2017
program define fegen_group
	syntax [if] [in] , [by(varlist) type(string)] /// -by- is ignored
		name(string) args(string) ///
		[Missing Label LName(name) Truncate(numlist max=1 int >= 1) ///
		Ratio(string) Verbose METhod(string) noSORT]

	* TODO: support label lname truncate

	loc verbose = ("`verbose'" != "")
	loc sort = ("`sort'" != "nosort")
	_assert inlist("`method'", "", "stata", "mata", "hash0", "hash1", "gtools")
	_assert ("`by'" == ""), msg("by() not supported")
	if ("`ratio'"=="") loc ratio .

	local 0 `args' `if' `in'
	syntax varlist [if] [in]

	loc is_sorted = ("`: sortedby'" == "`varlist'") | (strpos("`: sortedby'", "`varlist' ")==1)
	* Note: we need the space after `varlist' to prevent "id10" being matched with "id1"

	if ("`missing'" == "" & !`is_sorted') {
		marksample touse, strok
	}
	else if ("`if'`in'" != "") {
		marksample touse, strok novarlist
	}
	else if (`is_sorted' & inlist("`method'", "", "stata")) {
		* Shortcut if already sorted
		loc method stata_sorted
	}

	* Choose method if not provided
	if ("`method'" == "") {
		loc usemata = (c(N) > 5e5) | (c(k) * c(N) > 5e6) | ("`touse'" != "")
		loc method = cond(`usemata', "mata", "stata")
	}

	// ----------------

	* If varlist mixes strings and integers, use alternative strategy
	loc n1 0
	loc n2 0
	
	foreach var of local varlist {
		loc type : type `var'
		if (substr("`type'", 1, 3) == "str") {
			loc ++n1
		}
		else {
			loc ++n2
		}
	}
	
	// ----------------

	loc problem = (`n1' > 0) & (`n2' > 0)
	if (`problem') {
		loc method stata
	}

	// ----------------

	if ("`method'" == "stata") {
		Group_FirstPrinciples `varlist' , id(`name') ///
			touse(`touse') verbose(`verbose')
	}
	else if ("`method'" == "stata_sorted") {
		Group_FirstPrinciplesSorted `varlist' , id(`name') ///
			missing("`missing'") verbose(`verbose')
	}
	else {
		cap noi {
			mata: F = factor("`varlist'", "`touse'", `verbose', "`method'", `sort', 0, `ratio', 0)
			mata: F.store_levels("`name'")
		}
		loc rc = c(rc)
		cap mata: mata drop F
		error `rc'
	}
	la var `name' "group(`varlist')"
end


program define Group_FirstPrinciples, sortpreserve
	syntax varlist, id(name) [touse(string) Verbose(integer 0)]
	if (`verbose') {
		di as smcl "{txt}(method: {res}stata{txt})"
	}

	if ("`touse'" == "") {
		bys `varlist': gen long `id' = (_n == 1)
		qui replace `id' = sum(`id')
	}
	else {
		qui bys `touse' `varlist': gen long `id' = (_n == 1) if `touse'
		qui replace `id' = sum(`id')
		qui replace `id' = . if (`touse' != 1)
	}
	qui compress `id'
end


program define Group_FirstPrinciplesSorted
	syntax varlist, id(name) [missing(string) Verbose(integer 0)]
	if (`verbose') {
		di as smcl "{txt}(method: {res}stata_sorted{txt})"
	}

	if ("`missing'" == "") {
		by `varlist': gen long `id' = (_n == 1)
		qui replace `id' = sum(`id')
	}
	else {
		mata: st_local("exp", invtokens("mi(" :+ tokens("`varlist'") :+ ")", " | "))
		tempvar hasmv
		gen byte `hasmv' = `exp'

		qui bys `touse' `varlist': gen long `id' = (_n == 1) if !`hasmv'
		qui replace `id' = sum(`id')
		qui replace `id' = . if `hasmv'
	}
	qui compress `id'
end


findfile "ftools.mata"
include "`r(fn)'"
exit
